####################################################
####          Replication Code for              ####
####     Bayesian versus maximum likelihood     ####
####      estimation of treatment effects       ####
####          in bivariate probit               ####  
####       instrumentalvariable models          ####    
####################################################
####                                            ####  
####                3/ 7/2017                   ####         
####     this file runs rcode to create Figures ####  
####   in  Main text and Appendix G            ####  
####################################################



#### recreate simulation parameters
N_seq = c(50, 100, 250, 500)
cor = c(0.25,0.5,0.75)
IV_strength = c(0.5, 1, 1.5, 2)
treatment = c(0, 1, 2, 3, 4)
simulation_para = data.frame(do.call(rbind,replicate(500,as.matrix(expand.grid(N_seq, cor, IV_strength, treatment)),simplify = F)))
names(simulation_para) = c("N","cor","IVcoef","treatment")
simulation_para = simulation_para[order(simulation_para$N, simulation_para$cor, simulation_para$IVcoef, simulation_para$treatment),]
simulation_para$iteration = seq(1,120000)

### load Bayesian model results with prior (0, 2.5) and Stata output
load("R_output25.rda")
load("stata_output.rda")
### stata bootstrapped CIs for ATE
load("ate_estimates.rda")
names(ate_est)[1] = "iteration"


##combine stata and bayes results by iteration
stata = data.frame(stata_output[,c("iteration", "ATE")])
names(stata) = c("iteration", "StataAte")
stata = join(stata, ate_est[, c("iteration", "Lower","Upper")], by = "iteration")
names(stata) <- c("iteration", "StataAte", "StataLow95", "StataHigh95")


Bayes_res = data.frame(t(R_output25["ate",c("mean","median", "2.5%","97.5%", "TRUE"),]), (R_output25["iteration","Rhat/Misc",]))
names(Bayes_res) = c("BayesMean","BayesMedian", "BayesLow95","BayesHigh95%", "TrueAte","iteration")



sim_results = join(Bayes_res, stata, by = "iteration", type = "full")
sim_results = join(sim_results, simulation_para, by = "iteration", type = "full")

### create new dataset that has Bayesian Mean, Bayesian Median, 95% credible interval, Stata mean, 95% CI, 
### true ATE, number of observations, correlation, iv strength,
### drops those sims for which stata has no results or bootstrapped CI did not compute
                                        #sim_results = data.frame(as.numeric(as.character(R_output25[2, 1, stata$fileNum])), as.numeric(as.character(R_output25[2, 5, stata$fileNum])), as.numeric(as.character(R_output25[2, 2, stata$fileNum])), as.numeric(as.character(R_output25[2, 8, stata$fileNum])), stata_output$ATE[stata$fileNum], stata$Lower, stata$Upper, as.numeric(as.character(R_output25[2, 9, stata$fileNum])), simulation_para[stata$fileNum,]) 
#names(sim_results) = c("BayesMean", "BayesMedian", "BayesLow95", "BayesHigh95", "StataAte", "StataLow95", "StataHigh95", "TrueAte", "N", "Correlation", "IVcoef", "TrueTreatment")


### create indicator whether 95CI includes true value
sim_results$Bayes95Contain.25 = ifelse(sim_results$TrueAte <= sim_results$BayesHigh95 & sim_results$TrueAte >= sim_results$BayesLow95, 1, 0)
sim_results$Stata95Contain = ifelse(sim_results$TrueAte <= sim_results$StataHigh & sim_results$TrueAte >= sim_results$StataLow, 1, 0)

### calculate errors
sim_results$Stata_abs_error = abs(sim_results$StataAte - sim_results$TrueAte)
sim_results$Bayes_abs_error = abs(sim_results$BayesMedian - sim_results$TrueAte)


#### check for any simulations for stata for which we couldn't calculate the CI
dim(sim_results[is.na(sim_results$StataHigh) == T & is.na(sim_results$Stata_abs_error)==FALSE,])
dim(sim_results[is.na(sim_results$Stata_abs_error)==T,])

### drop them 
sim_results$StataAte = ifelse(is.na(sim_results$StataHigh) == T  & is.na(sim_results$Stata_abs_error)==FALSE, NA,sim_results$StataAte)#3499


### analyze results
#### by N and IV strength
N = c(50, 100, 250, 500)
IVs =  c(0.5, 1, 1.5, 2.0)
Bayes_mse_N = stata_mse_N = Bayes_mae_N = stata_mae_N  = Bayes_pct95_N  = stata_pct95_N = n_val = iv_val = NULL

for(i in 1: 4){
    for(j in 1:4){
        Bayes_mse_N = c(Bayes_mse_N, mean_sq(sim_results$BayesMedian[sim_results$N == N[i] & sim_results$IVcoef == IVs[j]] , sim_results$TrueAte[sim_results$N == N[i] & sim_results$IVcoef == IVs[j]]))
        stata_mse_N =  c(stata_mse_N , mean_sq(sim_results$StataAte[sim_results$N == N[i] & sim_results$IVcoef == IVs[j] & is.na(sim_results$StataAte)==FALSE] , sim_results$TrueAte[sim_results$N == N[i] & sim_results$IVcoef == IVs[j] & is.na(sim_results$StataAte)==FALSE]))
        Bayes_mae_N =  c(Bayes_mae_N, MAE(sim_results$BayesMean[sim_results$N == N[i] & sim_results$IVcoef == IVs[j]] , sim_results$TrueAte[sim_results$N == N[i] & sim_results$IVcoef == IVs[j]]))
        stata_mae_N =  c(stata_mae_N, MAE(sim_results$StataAte[sim_results$N == N[i] & sim_results$IVcoef == IVs[j] & is.na(sim_results$StataAte)==FALSE] , sim_results$TrueAte[sim_results$N == N[i] & sim_results$IVcoef == IVs[j] & is.na(sim_results$StataAte)==FALSE]))
        Bayes_pct95_N = c(Bayes_pct95_N, mean(sim_results$Bayes95Contain[sim_results$N == N[i] & sim_results$IVcoef == IVs[j]]))
        stata_pct95_N = c(stata_pct95_N, mean(sim_results$Stata95Contain[sim_results$N == N[i] & sim_results$IVcoef == IVs[j] & is.na(sim_results$StataAte)==FALSE]))
        n_val = c(n_val, N[i])
        iv_val = c(iv_val, IVs[j])
        }
}       



data_plot = data.frame(mse = c(Bayes_mse_N, stata_mse_N), fordiff = c(stata_mse_N, Bayes_mse_N), c95 = c(Bayes_pct95_N, stata_pct95_N), iv= rep(iv_val,2),n = rep(n_val,2), method = c(rep("Bayes",16),rep("Stata",16)))
data_plot$diff = abs(data_plot$mse - data_plot$fordiff)

### prepare Main manuscript Figure 1
p1 = ggplot(data = data_plot[data_plot$n == 50,],aes(y = mse, x = iv))
p1 = p1 + geom_rect(aes(xmax = iv + 0.05, xmin = iv - 0.05, ymin = rep(0, 8), ymax = diff), fill = "gray71")
p1 = p1 + geom_point(aes(colour = method, shape = method, linetype = method), size = 5) + geom_line(aes(colour = method, linetype = method, shape = method),size = 2.5)
p1 = p1 + theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) 
p1 = p1 + labs(title = "N = 50", y="MSE", x = "Instrument Strength")+ theme(axis.text.x=element_text(size=20)) + theme(axis.text.y=element_text(size=20))+ theme(axis.title.x = element_text(size = 20),axis.title.y = element_text(size = 20), title = element_text(size = 25))
p1 = p1 + scale_colour_manual(name = "Estimation Method", labels = c("Bayesian","Stata MLE"), values = c("darkblue", "darkred")) + scale_shape_manual(name = "Estimation Method", labels = c("Bayesian","Stata MLE"), values = c(19, 17)) + scale_linetype_manual(name = "Estimation Method", labels = c("Bayesian","Stata MLE"), values = c(1, 3))
p1 = p1 + theme(legend.position = c(.8, .87), legend.direction = "horizontal", legend.key = element_blank(), legend.background = element_rect(colour = "white"), legend.text = element_text(size = 15), legend.title = element_text(size = 15), panel.border = element_rect(colour = "black", fill=NA, size=2.5)) #+ scale_y_continuous(limits = c(0, 6.5))
plot(p1)
leg = g_legend(p1)
p1 = p1 + theme(legend.position="none")

p2 = ggplot(data = data_plot[data_plot$n == 100,],aes(y = mse, x = iv))
p2 = p2 + geom_rect(aes(xmax = iv + 0.05, xmin = iv - 0.05, ymin = rep(0, 8), ymax = diff), fill = "gray71")
p2 = p2 + geom_point(aes(colour = method, shape = method, linetype = method), size = 5) + geom_line(aes(colour = method, linetype = method, shape = method),size = 2.5)
p2 = p2 + theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) 
p2 = p2 + labs(title = "N = 100", y="MSE", x = "Instrument Strength")+ theme(axis.text.x=element_text(size=20)) + theme(axis.text.y=element_text(size=20))+ theme(axis.title.x = element_text(size = 20),axis.title.y = element_text(size = 20), title = element_text(size = 25))
p2 = p2 + scale_colour_manual(name = "Estimation Method", labels = c("Bayesian","Stata MLE"), values = c("darkblue", "darkred")) + scale_shape_manual(name = "Estimation Method", labels = c("Bayesian","Stata MLE"), values = c(19, 17)) + scale_linetype_manual(name = "Estimation Method", labels = c("Bayesian","Stata MLE"), values = c(1, 3))
p2 = p2 + theme(legend.position="none") + theme(panel.border = element_rect(colour = "black", fill=NA, size=3.5))
plot(p2)


p3 = ggplot(data = data_plot[data_plot$n == 250,],aes(y = mse, x = iv))
p3 = p3 + geom_rect(aes(xmax = iv + 0.05, xmin = iv - 0.05, ymin = rep(0, 8), ymax = diff), fill = "gray71")
p3 = p3 + geom_point(aes(colour = method, shape = method, linetype = method), size = 5) + geom_line(aes(colour = method, linetype = method, shape = method),size = 2.5)
p3 = p3 + theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) 
p3 = p3 + labs(title = "N = 250", y="MSE", x = "Instrument Strength")+ theme(axis.text.x=element_text(size=20)) + theme(axis.text.y=element_text(size=20))+ theme(axis.title.x = element_text(size = 20),axis.title.y = element_text(size = 20), title = element_text(size = 25))
p3 = p3 + scale_colour_manual(name = "Estimation Method", labels = c("Bayesian","Stata MLE"), values = c("darkblue", "darkred")) + scale_shape_manual(name = "Estimation Method", labels = c("Bayesian","Stata MLE"), values = c(19, 17)) + scale_linetype_manual(name = "Estimation Method", labels = c("Bayesian","Stata MLE"), values = c(1, 3))
p3 = p3 + theme(legend.position="none") + theme(panel.border = element_rect(colour = "black", fill=NA, size=3.5))
plot(p3)

p4 = ggplot(data = data_plot[data_plot$n == 500,],aes(y = mse, x = iv))
p4 = p4 + geom_rect(aes(xmax = iv + 0.05, xmin = iv - 0.05, ymin = rep(0, 8), ymax = diff), fill = "gray71")
p4 = p4 + geom_point(aes(colour = method, shape = method, linetype = method), size = 5) + geom_line(aes(colour = method, linetype = method, shape = method),size = 2.5)
p4 = p4 + theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) 
p4 = p4 + labs(title = "N = 500", y="MSE", x = "Instrument Strength")+ theme(axis.text.x=element_text(size=20)) + theme(axis.text.y=element_text(size=20))+ theme(axis.title.x = element_text(size = 20),axis.title.y = element_text(size = 20), title = element_text(size = 25))
p4 = p4 + scale_colour_manual(name = "Estimation Method", labels = c("Bayesian","Stata MLE"), values = c("darkblue", "darkred")) + scale_shape_manual(name = "Estimation Method", labels = c("Bayesian","Stata MLE"), values = c(19, 17)) + scale_linetype_manual(name = "Estimation Method", labels = c("Bayesian","Stata MLE"), values = c(1, 3))
p4 = p4 + theme(legend.position="none") + theme(panel.border = element_rect(colour = "black", fill=NA, size=3.5))
plot(p4)

pdf("Figure1.pdf", width = 12, height = 12 )
grid.newpage()
pushViewport(viewport(layout = grid.layout(4,3, heights = unit(c(11,1, 11, 2), "null"), width = unit(c(11,1, 11), "null"))))   
print(p1, vp = viewport(layout.pos.row = 1, layout.pos.col = 1))    
print(p2, vp = viewport(layout.pos.row = 1, layout.pos.col = 3))         
print(p3, vp = viewport(layout.pos.row = 3, layout.pos.col = 1))         
print(p4, vp = viewport(layout.pos.row = 3, layout.pos.col = 3))
leg$vp = viewport(layout.pos.row = 4, layout.pos.col = c(1:3))
grid.draw(leg) 
dev.off()
# 


### now same plot for mae
### this plot is in the Appendix section G
data_plot = data.frame(mae = c(Bayes_mae_N, stata_mae_N), fordiff = c(stata_mae_N, Bayes_mae_N), c95 = c(Bayes_pct95_N,stata_pct95_N), iv= rep(iv_val,2),n = rep(n_val,2), method = c(rep("Bayes",16),rep("Stata",16)))
data_plot$diff = abs(data_plot$mae - data_plot$fordiff)


p1 = ggplot(data = data_plot[data_plot$n == 50,],aes(y = mae, x = iv))
p1 = p1 + geom_rect(aes(xmax = iv + 0.05, xmin = iv - 0.05, ymin = rep(0, 8), ymax = diff), fill = "gray71")
p1 = p1 + geom_point(aes(colour = method, shape = method, linetype = method), size = 5) + geom_line(aes(colour = method, linetype = method, shape = method),size = 2.5)
p1 = p1 + theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) 
p1 = p1 + labs(title = "N = 50", y="MAE", x = "Instrument Strength")+ theme(axis.text.x=element_text(size=20)) + theme(axis.text.y=element_text(size=20))+ theme(axis.title.x = element_text(size = 20),axis.title.y = element_text(size = 20), title = element_text(size = 25))
p1 = p1 + scale_colour_manual(name = "Estimation Method", labels = c("Bayesian","Stata MLE"), values = c("darkblue", "darkred")) + scale_shape_manual(name = "Estimation Method", labels = c("Bayesian","Stata MLE"), values = c(19, 17)) + scale_linetype_manual(name = "Estimation Method", labels = c("Bayesian","Stata MLE"), values = c(1, 3))
p1 = p1 + theme(legend.position = c(.8, .87), legend.direction = "horizontal", legend.key = element_blank(), legend.background = element_rect(colour = "white"), legend.text = element_text(size = 15), legend.title = element_text(size = 15), panel.border = element_rect(colour = "black", fill=NA, size=2.5)) #+ scale_y_continuous(limits = c(0, 6.5))
plot(p1)
leg = g_legend(p1)
p1 = p1 + theme(legend.position="none")

p2 = ggplot(data = data_plot[data_plot$n == 100,],aes(y = mae, x = iv))
p2 = p2 + geom_rect(aes(xmax = iv + 0.05, xmin = iv - 0.05, ymin = rep(0, 8), ymax = diff), fill = "gray71")
p2 = p2 + geom_point(aes(colour = method, shape = method, linetype = method), size = 5) + geom_line(aes(colour = method, linetype = method, shape = method),size = 2.5)
p2 = p2 + theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) 
p2 = p2 + labs(title = "N = 100", y="MAE", x = "Instrument Strength")+ theme(axis.text.x=element_text(size=20)) + theme(axis.text.y=element_text(size=20))+ theme(axis.title.x = element_text(size = 20),axis.title.y = element_text(size = 20), title = element_text(size = 25))
p2 = p2 + scale_colour_manual(name = "Estimation Method", labels = c("Bayesian","Stata MLE"), values = c("darkblue", "darkred")) + scale_shape_manual(name = "Estimation Method", labels = c("Bayesian","Stata MLE"), values = c(19, 17)) + scale_linetype_manual(name = "Estimation Method", labels = c("Bayesian","Stata MLE"), values = c(1, 3))
p2 = p2 + theme(legend.position="none") + theme(panel.border = element_rect(colour = "black", fill=NA, size=3.5))
plot(p2)


p3 = ggplot(data = data_plot[data_plot$n == 250,],aes(y = mae, x = iv))
p3 = p3 + geom_rect(aes(xmax = iv + 0.05, xmin = iv - 0.05, ymin = rep(0, 8), ymax = diff), fill = "gray71")
p3 = p3 + geom_point(aes(colour = method, shape = method, linetype = method), size = 5) + geom_line(aes(colour = method, linetype = method, shape = method),size = 2.5)
p3 = p3 + theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) 
p3 = p3 + labs(title = "N = 250", y="MAE", x = "Instrument Strength")+ theme(axis.text.x=element_text(size=20)) + theme(axis.text.y=element_text(size=20))+ theme(axis.title.x = element_text(size = 20),axis.title.y = element_text(size = 20), title = element_text(size = 25))
p3 = p3 + scale_colour_manual(name = "Estimation Method", labels = c("Bayesian","Stata MLE"), values = c("darkblue", "darkred")) + scale_shape_manual(name = "Estimation Method", labels = c("Bayesian","Stata MLE"), values = c(19, 17)) + scale_linetype_manual(name = "Estimation Method", labels = c("Bayesian","Stata MLE"), values = c(1, 3))
p3 = p3 + theme(legend.position="none") + theme(panel.border = element_rect(colour = "black", fill=NA, size=3.5))
plot(p3)

p4 = ggplot(data = data_plot[data_plot$n == 500,],aes(y = mae, x = iv))
p4 = p4 + geom_rect(aes(xmax = iv + 0.05, xmin = iv - 0.05, ymin = rep(0, 8), ymax = diff), fill = "gray71")
p4 = p4 + geom_point(aes(colour = method, shape = method, linetype = method), size = 5) + geom_line(aes(colour = method, linetype = method, shape = method),size = 2.5)
p4 = p4 + theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) 
p4 = p4 + labs(title = "N = 500", y="MAE", x = "Instrument Strength")+ theme(axis.text.x=element_text(size=20)) + theme(axis.text.y=element_text(size=20))+ theme(axis.title.x = element_text(size = 20),axis.title.y = element_text(size = 20), title = element_text(size = 25))
p4 = p4 + scale_colour_manual(name = "Estimation Method", labels = c("Bayesian","Stata MLE"), values = c("darkblue", "darkred")) + scale_shape_manual(name = "Estimation Method", labels = c("Bayesian","Stata MLE"), values = c(19, 17)) + scale_linetype_manual(name = "Estimation Method", labels = c("Bayesian","Stata MLE"), values = c(1, 3))
p4 = p4 + theme(legend.position="none") + theme(panel.border = element_rect(colour = "black", fill=NA, size=3.5))
plot(p4)


pdf("Figure_G1.pdf", width = 12, height = 12 )
grid.newpage()
pushViewport(viewport(layout = grid.layout(4,3, heights = unit(c(11,1, 11, 2), "null"), width = unit(c(11,1, 11), "null"))))   
print(p1, vp = viewport(layout.pos.row = 1, layout.pos.col = 1))    
print(p2, vp = viewport(layout.pos.row = 1, layout.pos.col = 3))         
print(p3, vp = viewport(layout.pos.row = 3, layout.pos.col = 1))         
print(p4, vp = viewport(layout.pos.row = 3, layout.pos.col = 3))
leg$vp = viewport(layout.pos.row = 4, layout.pos.col = c(1:3))
grid.draw(leg) 
dev.off()
# 

### now plot 95% coverage
### Main Manuscript Figure 2
p1 = ggplot(data = data_plot[data_plot$n == 50,],aes(y = c95, x = iv))
p1 = p1 + geom_point(aes(colour = method, shape = method, linetype = method), size = 5) + geom_line(aes(colour = method, linetype = method, shape = method),size = 2.5)
p1 = p1 + theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) 
p1 = p1 + labs(title = "N = 50", y="95% Interval Coverage", x = "Instrument Strength")+ theme(axis.text.x=element_text(size=20)) + theme(axis.text.y=element_text(size=20))+ theme(axis.title.x = element_text(size = 20),axis.title.y = element_text(size = 20), title = element_text(size = 25))
p1 = p1 + scale_colour_manual(name = "Estimation Method", labels = c("Bayesian","Stata MLE"), values = c("darkblue", "darkred")) + scale_shape_manual(name = "Estimation Method", labels = c("Bayesian","Stata MLE"), values = c(19, 17)) + scale_linetype_manual(name = "Estimation Method", labels = c("Bayesian","Stata MLE"), values = c(1, 3))
p1 = p1 + geom_hline(yintercept = 0.95, colour = I("RED"), size = 1)
p1 = p1 + geom_hline(yintercept = 1, colour = I("BLUE"), size = 1) + scale_y_continuous(limits = c(0.6, 1.005), breaks = c(0.7,0.8,0.9,0.95,1.0),labels = c("0.7","0.8","0.9","0.95","1.0"))
p1 = p1 + theme(legend.position = c(.8, .87), legend.direction = "horizontal", legend.key = element_blank(), legend.background = element_rect(colour = "white"), legend.text = element_text(size = 15), legend.title = element_text(size = 15), panel.border = element_rect(colour = "black", fill=NA, size=2.5)) #+ scale_y_continuous(limits = c(0, 6.5))
plot(p1)
leg = g_legend(p1)
p1 = p1 + theme(legend.position="none")

p2 = ggplot(data = data_plot[data_plot$n == 100,],aes(y = c95, x = iv))
p2 = p2 + geom_point(aes(colour = method, shape = method, linetype = method), size = 5) + geom_line(aes(colour = method, linetype = method, shape = method),size = 2.5)
p2 = p2 + theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) 
p2 = p2 + labs(title = "N = 100", y="95% Interval Coverage", x = "Instrument Strength")+ theme(axis.text.x=element_text(size=20)) + theme(axis.text.y=element_text(size=20))+ theme(axis.title.x = element_text(size = 20),axis.title.y = element_text(size = 20), title = element_text(size = 25))
p2 = p2 + scale_colour_manual(name = "Estimation Method", labels = c("Bayesian","Stata MLE"), values = c("darkblue", "darkred")) + scale_shape_manual(name = "Estimation Method", labels = c("Bayesian","Stata MLE"), values = c(19, 17)) + scale_linetype_manual(name = "Estimation Method", labels = c("Bayesian","Stata MLE"), values = c(1, 3))
p2 = p2 + theme(legend.position="none") + theme(panel.border = element_rect(colour = "black", fill=NA, size=3.5))
p2 = p2 + geom_hline(yintercept = 0.95, colour = I("RED"), size = 1)
p2 = p2 + geom_hline(yintercept = 1, colour = I("BLUE"), size = 1) + scale_y_continuous(limits = c(0.6, 1.005), breaks = c(0.7,0.8,0.9,0.95,1.0),labels = c("0.7","0.8","0.9","0.95","1.0"))
plot(p2)


p3 = ggplot(data = data_plot[data_plot$n == 250,],aes(y = c95, x = iv))
p3 = p3 + geom_point(aes(colour = method, shape = method, linetype = method), size = 5) + geom_line(aes(colour = method, linetype = method, shape = method),size = 2.5)
p3 = p3 + theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) 
p3 = p3 + labs(title = "N = 250", y="95% Interval Coverage", x = "Instrument Strength")+ theme(axis.text.x=element_text(size=20)) + theme(axis.text.y=element_text(size=20))+ theme(axis.title.x = element_text(size = 20),axis.title.y = element_text(size = 20), title = element_text(size = 25))
p3 = p3 + scale_colour_manual(name = "Estimation Method", labels = c("Bayesian","Stata MLE"), values = c("darkblue", "darkred")) + scale_shape_manual(name = "Estimation Method", labels = c("Bayesian","Stata MLE"), values = c(19, 17)) + scale_linetype_manual(name = "Estimation Method", labels = c("Bayesian","Stata MLE"), values = c(1, 3))
p3 = p3 + theme(legend.position="none") + theme(panel.border = element_rect(colour = "black", fill=NA, size=3.5))
p3 = p3 + geom_hline(yintercept = 0.95, colour = I("RED"), size = 1)
p3 = p3 + geom_hline(yintercept = 1, colour = I("BLUE"), size = 1) + scale_y_continuous(limits = c(0.75, 1.005), breaks = c(0.8,0.9,0.95,1.0),labels = c("0.8","0.9","0.95","1.0"))
plot(p3)

p4 = ggplot(data = data_plot[data_plot$n == 500,],aes(y = c95, x = iv))
p4 = p4 + geom_point(aes(colour = method, shape = method, linetype = method), size = 5) + geom_line(aes(colour = method, linetype = method, shape = method),size = 2.5)
p4 = p4 + theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) 
p4 = p4 + labs(title = "N = 500", y="95% Interval Coverage", x = "Instrument Strength")+ theme(axis.text.x=element_text(size=20)) + theme(axis.text.y=element_text(size=20))+ theme(axis.title.x = element_text(size = 20),axis.title.y = element_text(size = 20), title = element_text(size = 25))
p4 = p4 + scale_colour_manual(name = "Estimation Method", labels = c("Bayesian","Stata MLE"), values = c("darkblue", "darkred")) + scale_shape_manual(name = "Estimation Method", labels = c("Bayesian","Stata MLE"), values = c(19, 17)) + scale_linetype_manual(name = "Estimation Method", labels = c("Bayesian","Stata MLE"), values = c(1, 3))
p4 = p4 + theme(legend.position="none") + theme(panel.border = element_rect(colour = "black", fill=NA, size=3.5))
p4 = p4 + geom_hline(yintercept = 0.95, colour = I("RED"), size = 1)
p4 = p4 + geom_hline(yintercept = 1, colour = I("BLUE"), size = 1) + scale_y_continuous(limits = c(0.75, 1.005), breaks = c(0.8,0.9,0.95,1.0),labels = c("0.8","0.9","0.95","1.0"))
plot(p4)


pdf("Figure2.pdf", width = 12, height = 12 )
grid.newpage()
pushViewport(viewport(layout = grid.layout(4,3, heights = unit(c(11,1, 11, 2), "null"), width = unit(c(11,1, 11), "null"))))   
print(p1, vp = viewport(layout.pos.row = 1, layout.pos.col = 1))    
print(p2, vp = viewport(layout.pos.row = 1, layout.pos.col = 3))         
print(p3, vp = viewport(layout.pos.row = 3, layout.pos.col = 1))         
print(p4, vp = viewport(layout.pos.row = 3, layout.pos.col = 3))
leg$vp = viewport(layout.pos.row = 4, layout.pos.col = c(1:3))
grid.draw(leg) 
dev.off()
# 

#### Calculate statistics that are discussed in the paper on page 10
### MSE's and their difference for the parameter combinations shown in plot
data_plot = data.frame(mse = c(Bayes_mse_N, stata_mse_N), fordiff = c(stata_mse_N, Bayes_mse_N), c95 = c(Bayes_pct95_N,stata_pct95_N), iv= rep(iv_val,2),n = rep(n_val,2), method = c(rep("Bayes",16),rep("Stata",16)))
data_plot$diff = abs(data_plot$mse - data_plot$fordiff)

### maximum &  minimum coverage as in paper for Bayesian model 
max(data_plot$c95[data_plot$method== "Bayes"])
min(data_plot$c95[data_plot$method== "Bayes"])

## how many of the coverage stats are between 0.94 and 0.98 for Bayesian model
dim(data_plot[data_plot$method== "Bayes" & data_plot$c95 >= 0.94 & data_plot$c95 <= 0.97,])
dim(data_plot[data_plot$method== "Bayes",])


### maximum &  minimum coverage as in paper for MLE model 
min(data_plot$c95[data_plot$method== "Stata"])
data_plot[data_plot$c95 == min(data_plot$c95[data_plot$method== "Stata"]),]

max(data_plot$c95[data_plot$method== "Stata"])
data_plot[data_plot$c95 == max(data_plot$c95[data_plot$method== "Stata"]),]
## how many greater 0.94 for MLE 
dim(data_plot[data_plot$method== "Stata" & data_plot$c95 >= 0.9,])
